1
# 1
roiDir = "C:/Users/wilson/Desktop/data/"
fileList = dir(roiDir)
fullPath = paste(roiDir, fileList[1], sep = "")
dat = readSubject(fullPath)
sub1 <- dat[[1]]
sub1 <- as.data.frame(sub1)
sub1 %>%
mutate(rawid = NULL, icv = sum(volume * (roi != "CSF"))) %>%
select(-min, -max, -mean, -std)
## roi volume type level icv
## 1 Telencephalon_L 531111 1 1 1268519
## 2 Telencephalon_R 543404 1 1 1268519
## 3 Diencephalon_L 9683 1 1 1268519
## 4 Diencephalon_R 9678 1 1 1268519
## 5 Mesencephalon 10268 1 1 1268519
## 6 Metencephalon 159402 1 1 1268519
## 7 Myelencephalon 4973 1 1 1268519
## 8 CSF 109776 1 1 1268519
2
# 2
dat <- read.table("C:/Users/wilson/Desktop/classInterests.txt",header = T)
dat$Year <- factor(dat$Year, levels = c("Sophomore","Junior", "Senior","Master's", "PhD"))
p1 <- ggplot(data = dat, aes(x=Year))+ geom_bar()+labs(title="Bar plot of students' year", x="Year", y="Count")
p2 <- ggplot(data = dat, aes(x=Program))+ geom_bar()+labs(title="Bar plot of students' program", x="Program", y="Count")
p3 <- ggplot(data = dat, aes(x=Year,fill=Program))+ geom_bar()+ theme_minimal()+labs(title="Bar plot of students' year and program", x="Year", y="Count")
ggplotly(p1)
p2i <- ggplotly(p2)
p2i
p3i <- ggplotly(p3)
p3i
3
# 3
p4 <- ggplot(data = dat)+ geom_mosaic(aes(x=product(Year,Program),fill=Year))+
labs(title="Mosaic plot of the class data for year and program", x="Program", y="Year")+
theme(axis.title.y=element_text(vjust = -20,colour = "white"),axis.text.x=element_text(angle=90,hjust=1),axis.text.y=element_text(colour = "white"))
p4i <- ggplotly(p4)
p4i
4
# 4
dat4 <- read_csv("GSE5859_exprs.csv", )
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_double(),
## X1 = col_character()
## )
## See spec(...) for full column specifications.
dt4 <- dat4 %>% select(-X1)
rmv <- rowMeans(dt4)
rmm <- matrix(rep(rmv,ncol(dt4)), nrow = nrow(dt4), ncol = ncol(dt4))
cmv <- colMeans(dt4)
cmm <- matrix(rep(cmv,nrow(dt4)), byrow = T, ncol = ncol(dt4))
dt4 <- dt4-rmm-cmm
result <- cbind(dat4[,1],dt4)
head(select(result,1:5),5)
## X1 GSM25581.CEL.gz GSM25681.CEL.gz GSM136524.CEL.gz GSM136707.CEL.gz
## 1 1007_s_at -5.753311 -6.368852 -5.952808 -5.613885
## 2 1053_at -5.540006 -5.413417 -5.325963 -5.829139
## 3 117_at -5.420924 -6.373643 -6.163945 -5.806505
## 4 121_at -6.352589 -6.280611 -5.766674 -5.367892
## 5 1255_g_at -5.303813 -5.810665 -5.795717 -5.576371
5
# 5
dat5 <- read_csv("healthcare-spending.csv",skip = 2)
## Parsed with column specification:
## cols(
## .default = col_double(),
## Location = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 12 parsing failures.
## row col expected actual file
## 53 -- 25 columns 1 columns 'healthcare-spending.csv'
## 54 -- 25 columns 1 columns 'healthcare-spending.csv'
## 55 -- 25 columns 1 columns 'healthcare-spending.csv'
## 56 -- 25 columns 1 columns 'healthcare-spending.csv'
## 57 -- 25 columns 1 columns 'healthcare-spending.csv'
## ... ... .......... ......... .........................
## See problems(...) for more details.
dat5 <- dat5[2:52,]
names(dat5) <- (c("Location",1991:2014))
dt5 <- gather(dat5, Year, HealthcareSpending, "1991":"2014")
dat5 <- read_csv("healthcare-spending.csv",skip = 2)
## Parsed with column specification:
## cols(
## .default = col_double(),
## Location = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 12 parsing failures.
## row col expected actual file
## 53 -- 25 columns 1 columns 'healthcare-spending.csv'
## 54 -- 25 columns 1 columns 'healthcare-spending.csv'
## 55 -- 25 columns 1 columns 'healthcare-spending.csv'
## 56 -- 25 columns 1 columns 'healthcare-spending.csv'
## 57 -- 25 columns 1 columns 'healthcare-spending.csv'
## ... ... .......... ......... .........................
## See problems(...) for more details.
dat5 <- dat5[2:52,]
names(dat5) <- (c("Location",1991:2014))
dt5 <- gather(dat5, Year, HealthcareSpending, "1991":"2014")
p5 <- ggplot(dt5,aes(x = Year,y=HealthcareSpending, color=Location))+
geom_point()+
theme_minimal()+
labs(title="Scatter plot of healthcare spending versus time by states.", x="Year", y="Healthcare spending")+
theme(axis.text.x=element_text(angle=90,hjust=1),legend.text = element_text(size=7))
p5i <- ggplotly(p5)
p5i
6
# 6
dat6 <- dat5 %>%
mutate(AverageHealthCost=rowMeans(dat5[,-1]))
p6 <- ggplot(dat6,aes(x=Location,y=AverageHealthCost))+
geom_col()+
theme_minimal()+
labs(title="Barplot of average health care spending by state", x="Location", y="Average health care spending")+
theme(axis.text.x=element_text(angle=90,hjust=1))
p6i <- ggplotly(p6)
p6i